import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import pandas as pd
# from dataset.vocab import Vocab, tokenizer
from vocab import Vocab, tokenizer

df = pd.read_csv("../data/damo_mt_news_testsets_zh2en/damo_mt_testsets_zh2en_news_wmt18.csv")
data = df.to_numpy()

for i, item in enumerate(data):
    if len(item[0]) <= 1 or len(item[1]) <= 1:
        print(i)
        print(len(item[0]), len(item[1]))
